## -----------------------------------------------------------
## Enumerate 150-precinct map with 2,000,000 partitions stored
## -----------------------------------------------------------
library(redist)
library(rgdal)
library(sp)
library(doMC)
library(igraph)
source("../random_submap.R")

## Load data
fl_map <- readOGR(path.expand("../../data/fl"), "FL", verbose = FALSE)

## ------------
## Run the test
## ------------
dir.create("../../data/random_sampling_test")

## Create trial map
map_sub <- random_submap(fl_map, 50)

## Sink edgelist
adjlist <- poly2nb(map_sub, queen = FALSE)
sink(paste0("../../data/random_sampling_test/map_sub.dat"))
for(k in 1:length(adjlist)){
    sub <- adjlist[[k]]
    sub <- sub[sub > k]
    if(length(sub) > 0){
        for(l in 1:length(sub)){
            cat(paste(k, sub[l], collapse = " "))
            cat("\n")
        }
    }
}
sink()

## Reorder edges - this step reorders the edges of edgelist to make
## ZDD construction more efficient
system("python ../ndscut.py <../../data/random_sampling_test/map_sub.dat >../../data/random_sampling_test/map_sub_ordered.dat", wait = TRUE)
system("mv ../../data/random_sampling_test/map_sub_ordered.dat ../../data/random_sampling_test/map_sub.dat", wait = TRUE)

## Sink map nodes to be able to reconstruct submap
sink("../../data/random_sampling_test/map_sub_nodes.dat")
cat(paste(rownames(map_sub@data), collapse = " "), "\n")
sink()

## Construct call to system() - random sample of 500 solutions
call <- "../../enumpart_private/enumpart ../../data/random_sampling_test/map_sub.dat -k 2 -sample 5000 > ../../data/random_sampling_test/solutions.dat"
system(call, wait = TRUE)

## Construct call to system() - output all solutoins
call <- "../../enumpart_private/enumpart ../../data/random_sampling_test/map_sub.dat -k 2 -allsols > ../../data/random_sampling_test/solutions_all.dat"
system(call, wait = TRUE)

## ---------------------------------------------------------
## If re-loading the map for analysis, this is how to subset
## ---------------------------------------------------------
## Load data
fl_map <- readOGR(path.expand("../../data/fl"), "FL", verbose = FALSE)
nodes <- strsplit(readLines("../../data/random_sampling_test/map_sub_nodes.dat"),
                  " ")[[1]]

## First, subset down to right map
fl_sub <- fl_map[which(rownames(fl_map@data) %in% nodes),]

## Then, reorder nodes to be correct order to match - otherwise, won't
## line up properly with original map
fl_sub <- fl_sub[match(nodes, rownames(fl_sub@data)),]

## Convert factor columns to characters, because this particular data is messy
indx <- sapply(fl_sub@data, is.factor)
fl_sub@data[indx] <- lapply(
    fl_sub@data[indx], function(x) as.numeric(as.character(x))
)

## ---------------------
## Get true distribution
## ---------------------
## Load solutions
sols <- readLines("../../data/random_sampling_test/solutions.dat")
sols_all <- readLines("../../data/random_sampling_test/solutions_all.dat")

## Create edgelist
el <- strsplit(readLines("../../data/random_sampling_test/map_sub.dat"),
               split = " ")
el <- apply(do.call(rbind, el), 2, as.numeric)

## Convert solutions list into congressional district assignments
## Random sampling code first
ndists <- 2
sols_out <- mclapply(1:length(sols), function(x){
    sol_split <- as.numeric(strsplit(sols[x], split = " ")[[1]])
    el_sub <- el[sol_split,]
    comps_out <- components(graph_from_edgelist(el_sub, directed = FALSE))
    if(comps_out$no < ndists){
        max_num <- max(comps_out$membership)
        comps_out <- c(comps_out$membership, (max_num + 1):ndists)
    }else{
        comps_out <- comps_out$membership
    }
    return(comps_out)
}, mc.cores = detectCores())
sols_out <- do.call(cbind, sols_out)

## All solutions next
sols_out_all <- mclapply(1:length(sols_all), function(x){
    sol_split <- as.numeric(strsplit(sols_all[x], split = " ")[[1]])
    el_sub <- el[sol_split,]
    comps_out <- components(graph_from_edgelist(el_sub, directed = FALSE))
    if(comps_out$no < ndists){
        max_num <- max(comps_out$membership)
        comps_out <- c(comps_out$membership, (max_num + 1):ndists)
    }else{
        comps_out <- comps_out$membership
    }
    return(comps_out)
}, mc.cores = detectCores())
sols_out_all <- do.call(cbind, sols_out_all)

## Summary statistics for both
seg_truth <- unlist(mclapply(1:ncol(sols_out), function(x){
    T <- sum(fl_sub@data$pop)
    P <- sum(fl_sub@data$mccain) / T
    t_i <- tapply(fl_sub@data$pop, sols_out[,x], sum)
    p_i <- tapply(fl_sub@data$mccain, sols_out[,x], sum) / t_i
    return(sum(t_i * abs(p_i - P) / (2 * T * P * (1 - P)), na.rm = TRUE))
}, mc.cores = detectCores()))

seg_truth_all <- unlist(mclapply(1:ncol(sols_out_all), function(x){
    T <- sum(fl_sub@data$pop)
    P <- sum(fl_sub@data$mccain) / T
    t_i <- tapply(fl_sub@data$pop, sols_out_all[,x], sum)
    p_i <- tapply(fl_sub@data$mccain, sols_out_all[,x], sum) / t_i
    return(sum(t_i * abs(p_i - P) / (2 * T * P * (1 - P)), na.rm = TRUE))
}, mc.cores = detectCores()))

## Looks like a random sample!
plot(density(seg_truth_all), col = "red")
lines(density(seg_truth), col = "blue")

## ---------------------
## Now compare with MCMC
## ---------------------
mcmc_out <- redist.mcmc(fl_sub, ndists = 2,
                        nsims = 10000, popvec = fl_sub@data$pop)
mcmc_seg <- redist.segcalc(mcmc_out, grouppop = fl_sub@data$mccain,
                           fullpop = fl_sub@data$pop)

plot(density(seg_truth_all), col = "red")
lines(density(seg_truth), col = "blue")
lines(density(mcmc_seg), col = "purple", lty = "dashed")
